library(readr)
library(data.table)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(shiny)
data_senic <- read_table2("SENIC.txt", col_names = FALSE)
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_integer(),
## X8 = col_integer(),
## X9 = col_integer(),
## X10 = col_integer(),
## X11 = col_integer(),
## X12 = col_double()
## )
setnames(data_senic, old = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12"),
new = c("ID", "length_of_stay", "avg_age", "avg_risk", "culturing_ratio", "chest_xray_ratio",
"avg_beds", "school_affil", "region", "avg_daily_census", "avg_nurses", "facility_per"))
# Fixing the datatypes
data_senic$school_affil <- factor(data_senic$school_affil, levels = c(1, 2),labels = c("Yes", "No"))
data_senic$region <- factor(data_senic$region, levels = c(1, 2, 3, 4),labels = c("NE", "NC", "S", "W"))
quant_cal <- function(x){
#x <- mtcars$hp
q1 <- as.numeric(quantile(x, probs = c(0.25)))
q3 <- as.numeric(quantile(x, probs = c(0.75)))
iqr <- q3-q1
upper_value <- q3+(1.5*iqr)
lower_value <- q1-(1.5*iqr)
outliers_index <- which(x < lower_value | x > upper_value)
#which(x %in% boxplot(x)$out) # must be equal to above
return(outliers_index)
}
ggplot(data=data_senic, aes(x=avg_risk)) + geom_density() + geom_point(data = data_senic[quant_cal(data_senic$avg_risk), c("avg_risk")], aes(y = 0, x = avg_risk), shape=18, size=2)
Analysis: We see that the number of outliers are fairly small, however this visualization is not very insightful
data_senic_numeric <- data_senic[,c("ID", "length_of_stay", "avg_age", "avg_risk", "culturing_ratio", "chest_xray_ratio", "avg_beds", "avg_daily_census", "avg_nurses", "facility_per")]
p1 <- ggplot(data=data_senic_numeric, aes(x=length_of_stay)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$length_of_stay), c("length_of_stay")], aes(y = 0, x = length_of_stay), shape=18, size=2)
p2 <- ggplot(data=data_senic_numeric, aes(x=avg_age)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$avg_age), c("avg_age")], aes(y = 0, x = avg_age), shape=18, size=2)
p3 <- ggplot(data=data_senic_numeric, aes(x=culturing_ratio)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$culturing_ratio), c("culturing_ratio")], aes(y = 0, x = culturing_ratio), shape=18, size=2)
p4 <- ggplot(data=data_senic_numeric, aes(x=chest_xray_ratio)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$chest_xray_ratio), c("chest_xray_ratio")], aes(y = 0, x = chest_xray_ratio), shape=18, size=2)
p5 <- ggplot(data=data_senic_numeric, aes(x=avg_beds)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$avg_beds), c("avg_beds")], aes(y = 0, x = avg_beds), shape=18, size=2)
p6 <- ggplot(data=data_senic_numeric, aes(x=avg_daily_census)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$avg_daily_census), c("avg_daily_census")], aes(y = 0, x = avg_daily_census), shape=18, size=2)
p7 <- ggplot(data=data_senic_numeric, aes(x=avg_nurses)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$avg_nurses), c("avg_nurses")], aes(y = 0, x = avg_nurses), shape=18, size=2)
p8 <- ggplot(data=data_senic_numeric, aes(x=facility_per)) + geom_density() # contains no outliers
p9 <- ggplot(data=data_senic_numeric, aes(x=avg_risk)) + geom_density() + geom_point(data = data_senic_numeric[quant_cal(data_senic_numeric$avg_risk), c("avg_risk")], aes(y = 0, x = avg_risk), shape=18, size=2)
# final output
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9) # facility_per does not contain any outlier
Analysis: Most parameters are skewed towards right, while only age and risk are close to normally distributed
ggplot(data = data_senic, aes(x = avg_nurses, y = avg_risk)) + geom_point(aes(color = avg_beds))
Analysis: We see trend between number of nurses and risk, as the number of nurses increases risk decrease but this only happens when the average nurse >200, this may be due to the fact that medical facilities with higher risks are higher in number than low risk hospital. The risk of this colour profile is its hard to distinguish between the levels of beds.
ggplotly(p9)
Analysis: The ability to pan and zoom are now present.
data = data_senic_numeric[quant_cal(data_senic_numeric$avg_risk), c("avg_risk")]
data_senic %>% select(avg_risk) %>% plot_ly(x = ~avg_risk, type = "histogram") %>%
add_markers(x = data$avg_risk, y = 0, showlegend = FALSE, marker = list(color = "red", symbol = c('diamond')))
library(shiny)
ui <- fluidPage(
# Application title
titlePanel("Select the variable to plot"),
sidebarPanel(
column(3,checkboxGroupInput(label = "variable", "Select Column to Plot",
choices = c("length_of_stay", "avg_age", "avg_risk", "culturing_ratio", "chest_xray_ratio", "avg_beds", "avg_daily_census", "avg_nurses", "facility_per"),
selected = "length_of_stay"))),
sliderInput("bw_adjust", label = "Bandwidth adjustment:", min = 0.2, max = 2, value = 1, step = 0.2),
mainPanel(plotOutput('coolplot'),width = '40%'))
server <- shinyServer(function(input, output) {
plotOutput$coolplot<-renderPlot({
gg <- ifelse(checkboxGroupInput$variable == "length_of_stay", p1,
ifelse(checkboxGroupInput$variable == "avg_age", p2,
ifelse(checkboxGroupInput$variable == "culturing_ratio", p3,
ifelse(checkboxGroupInput$variable == "chest_xray_ratio", p4,
ifelse(checkboxGroupInput$variable == "avg_beds", p5,
ifelse(checkboxGroupInput$variable == "avg_daily_census", p6,
ifelse(checkboxGroupInput$variable == "avg_nurses", p7,
ifelse(checkboxGroupInput$variable == "facility_per", p8, p9))))))))
plot(gg)
})
})
shinyApp(ui, server)